---
title: "Cleaning Middle East Values Study: Religious Fundamentalism, Attitudes toward Political Violence, and Developmental Idealism among Youth in Egypt and Saudi Arabia"
---

```{r}
# load packages
  source("helper-packages.R")

# load
  youth_raw <- 
    import("../raw-data/y-multi-middle-east-values-study/religious-fundamentalism-attitudes-toward-pol-violence-youth/Youth Survey in Egypt and Saudi Arabia.dta")
```

# Clean

```{r}
# declare dates
  youth_dates <- 
    tribble(
      ~resp_country_common, ~resp_interview_start_date, ~resp_interview_end_date,
      "Saudi Arabia", "10 Jul 05", "25 Jul 05",             
      "Egypt", "05 May 05", "20 Jun 05") %>% 
    mutate(
      resp_interview_start_date = as.Date(resp_interview_start_date, "%d %b %y"),
      resp_interview_end_date = as.Date(resp_interview_end_date, "%d %b %y"))

# clean
  youth_clean <- 
    youth_raw %>% 
    mutate(
      
    #########################  
    ####### META-DATA #######  
    #########################      
      
      # source name (character vector, title case)
        resp_source = "The Youth, Emotional Energy, and Political Violence: The Cases of Egypt and Saudi Arabia",
        
      # round number (character vector, title case)  
        resp_round = "",
      
      # url to dataset source, where publicly available (character vector)
        resp_original_data_url = "bit.ly/38O5jjL",

      # survey mode (in-person/phone)
        resp_survey_mode = "in-person",

      # country (character vector; list of countries as written in original source)
        resp_country_original = 
          dplyr::recode(
            as.character(CORIGIN),
            "1" = "Saudi Arabia",
            "2" = "Egypt"),

      # country (character vector; converts to countrycode county.name list)
        resp_country_common = 
          countryname(resp_country_original),
        
      # interview date (variable of class Date; if only month given, input 1st of month)
        resp_interview_date = NA) %>% 
        left_join(
          youth_dates, by = "resp_country_common") %>% 
        mutate(
          
    #########################  
    ##### DEMOGRAPHICS ######  
    #########################
      
      # respondent's religion (character vector that corresponds to master list)
        resp_religion = "Muslim", # strongly implied in q114 that all respondents were muslims

      # respondent's religion (character vector that corresponds to master list)
        resp_denomination = "Muslim", # strongly implied in q114 that all respondents were muslims

      # respondent's age (character vector; bins denoted by single dash ["18-25"])
        resp_age =
          dplyr::recode(
            as.character(v195), # note that the numbering is different
            "999" = NA_character_),        
      
      # respondent's education level
        resp_education_original =
          dplyr::recode(
            as.character(v196),
            "1" = "1. No formal education [No education]", # note that the frequencies in the codebook don't match; but this is the only way that makes sense
            "2" = "2. Incomplete primary school [No education]",
            "3" = "3. Complete primary school [Primary]",
            "4" = "4. Incomplete secondary school: technical/vocational type [Primary]",
            "5" = "5. Complete secondary school: technical/vocational type [Primary]",
            "6" = "6. Incomplete secondary: university-preparatory type [Primary]",
            "7" = "7. Complete secondary: university-preparatory type [Primary]",
            "8" = "8. Some university-level education, without degree [Primary]",
            "9" = "9. University-level education, with degree [College]",
            .default = NA_character_),       
      
      # respondent's gender (numeric: female = 1; male = 0; other = NA)
        resp_female = 
          case_when(
            v193 == 1 ~ 0,
            v193 == 2 ~ 1,
            TRUE ~ NA_real_),
      
      # respondent resident in rural (vs urban) area (numeric: rural = 1; urban/semi-urban/peri-urban = 0)
        resp_rural = 
          case_when(
            v200 %in% c(1, 2) ~ 1,
            v200 %in% c(3:5) ~ 0,
            TRUE ~ NA_real_),
      
    #########################  
    ### SOCIAL DISTANCE 1 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_1_qinfo = "NUM: v89 [v86]; QTEXT: If it were possible, I'd rather have a job where I worked with people with the same religious views I have rather than with people with different views.; ROPTIONS: 1 = Strongly agree [=1] + 2 = Agree [=1] + 3 Disagree [=0] + 4 = Strongly disagree [=0]; TARGET: Different religion; TYPE: Distance, work",
      
      # original response (as character vector)
        resp_soc_dist_1_original = 
          dplyr::recode(
            as.character(v89),
            "9" = NA_character_,
            "97" = NA_character_,
            "999" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_1_bin_recode = 
          case_when(
            v89 %in% c(1:2) ~ 1,
            v89 %in% c(3:4) ~ 0,
            TRUE ~ NA_real_),
    
    ############################  
    ### GENERAL SOCIAL TRUST ###  
    ############################
    
      # original question number; question text; response options (input above)
        resp_gentrust_qinfo = NA, # checked, no general trust question asked

      # original response (as character vector)
        resp_gentrust_original = NA,       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_gentrust_bin_recode = NA,    
    
    #########################  
    ##### RELIGIOSITY #######  
    #########################
    
      # original question number; question text; response options (input above)
        resp_religiosity_qinfo = "NUM: v99 [v102]; QTEXT: Independently of whether you go to religious services or not, would you say you are...; ROPTIONS: 1 = A religious person + 2 = Not a religious person + 3 = A convinced atheist (not believing in God)",
  
      # original response (as numeric vector, with non-substantive responses coded as NA_real_)
        resp_religiosity_original = 
          dplyr::recode(
            as.numeric(v102),
            `9` = NA_real_,
            `97` = NA_real_,
            `999` = NA_real_),       

      # recode (numeric: scaled 0-1, where 1 is more religious)
        resp_religiosity_recode = 
          case_when(
            v102 == 1 ~ 1,
            v102 %in% c(2, 3) ~ 0
          )
    
    ) %>% 
    select(starts_with("resp_"))
```

# Save data

```{r}
  saveRDS(youth_clean, "../cleaned-data/y-11-multi-middle-east-values-study-rfapv.rds")
```
